Calculating Track Mileage by UZA

Author

Steven Andrews, Boston Region MPO

Published

June 1, 2023

library(sf)
library(tidyverse)
library(mapview)
library(gtfstools)
library(stringr)
library(gt)
mapviewOptions(fgb = FALSE)

UZA Work

We write a function to grab the files from the Census Bureau site and perform light filtering and manipulation.

sf_from_zip <- function(URL) {
  # based on:
  # https://stackoverflow.com/questions/59740419/unzipping-and-reading-shape-file-in-r-without-rgdal-installed
  
  # Load local temp files
  temp_0 <- tempfile()
  temp_1 <- tempfile()
  
  # download the zip, put it in the first temp directory
  download.file(URL, temp_0)
  
  # Unzip and place in temp_1
  unzip(zipfile = temp_0, exdir = temp_1)
  
  # Read the shapefile.
  geom_sf <- sf::read_sf(temp_1)
  
  out <- geom_sf
}
# Record the URLs.
UZA10_URL <- "https://www2.census.gov/geo/tiger/TIGER2020/UAC/tl_2020_us_uac10.zip"
UZA20_URL <- "https://www2.census.gov/geo/tiger/TIGER2020/UAC/tl_2020_us_uac20.zip"

# Obtain the data.
UZA10 <- sf_from_zip(UZA10_URL)
UZA20 <- sf_from_zip(UZA20_URL)

# Transform to state coordinate system.
UZA10 <- UZA10 |> sf::st_transform(26986)
UZA20 <- UZA20 |> sf::st_transform(26986)

NE <- c("MA", "CT", "RI", "VT", "NH", "ME")
NE_collapse <- paste0(NE,collapse = "|")

# Clean the UZAs for processing. We want to separate out
# clusters (C) and Urbanized Areas (U)
UZA10_NE <- UZA10 |>
  filter(str_detect(NAME10, NE_collapse)) |> 
  mutate(UZA_type = if_else(UATYP10 == "C", 
                            "Census 2010 Cluster", 
                            "Census 2010 Urbanized Area"))

# There are only Urban Areas in 2020.
UZA20_NE <- UZA20 |>
  filter(str_detect(NAME20, NE_collapse))

saveRDS(UZA10_NE, "../data/processed/UZA10_NE.rds")
saveRDS(UZA20_NE, "../data/processed/UZA20_NE.rds")

CR Line Work

We start by downloading the MassGIS “trains” feature. In this dataset Foxboro service is the full line between Walpole and the Providence Line. We want to clip that to Foxboro Station. We use a recent GTFS file for this.

# Read in the trains file.
# This function works only because the lines we want are the first layer. 
# Otherwise, we'd need to change the function to accept a layer name/number.
trains <- sf_from_zip("https://s3.us-east-1.amazonaws.com/download.massgis.digital.mass.gov/shapefiles/state/trains.zip") |> 
  mutate(feature_id = row_number())

# Fix an error: 3235 is a small segment that seems to missclassified on the Lowell Line @ Town Farm Lane towards the north end of the line. 
# This is a very fragile fix because the id is the row_number. 
trains <- trains |> 
  mutate(COMMRAIL = if_else(feature_id == 3235, "Y", COMMRAIL))

# Select passenger trains and "special trains". 
# Exclude yards and Foxboro, which we replace later on. 
# We also don't want the Plymouth Station section of track which is not in use for passenger service.
trains_pass <- trains |> filter(COMMRAIL == "Y" | COMMRAIL == "S", 
                                is.na(YARD),
                                !COMM_LINE == "+fox+",
                                !COMM_LINE %in% c('+OC+PLY-KING+PLY+',
                                                '+OC+KING+'))

mapview(trains_pass, zcol = "COMM_LINE", color = viridis::turbo, legend = FALSE)

Handle Foxboro Station

# This is the last Summer 2022 GTFS schedule. 
# We chose this because it contains the CapeFlyer if we need it. 
gtfs_summ22 <- gtfstools::read_gtfs("https://cdn.mbtace.com/archive/20220812.zip")

# Convert to MA system.
gtfs_summ22_shp <- convert_shapes_to_sf(gtfs_summ22) |> 
  st_transform(26986)

# Select only Foxboro.
gtfs_summ22_trips_fox <- gtfs_summ22$trips |> 
  filter(str_detect(route_id, "Foxboro")) |> 
  distinct(shape_id)

We find the portion of the trains segment that is on the Foxboro branch from a GTFS file.

# Buffer the GTFS shape by 15 meters, which seems to capture the full Foxboro branch. 
gtfs_summ22_fox_buff <- gtfs_summ22_shp |> 
  filter(shape_id %in% gtfs_summ22_trips_fox$shape_id) |> 
  st_buffer(15)

# Select only those train segments for for Foxboro.
trains_fox <- trains |> filter(COMM_LINE == "+fox+")

# Intersect the datasets.
trains_fox_int <- st_intersection(trains_fox, gtfs_summ22_fox_buff)

# Visually inspect the results.
mapview(trains_fox_int) + mapview(gtfs_summ22_fox_buff, col.regions = "grey50")

and we append it to our trains_pass dataset.

# Attach Foxboro to the rest of the dataset.
trains_pass <- bind_rows(trains_pass, trains_fox_int)

# View results
mapview(trains_pass, zcol = "COMM_LINE", color = viridis::turbo, legend = FALSE)
# Save for easy loading.
saveRDS(trains_pass, "../data/processed/trains_pass.rds")

Perform Intersections

Read in the pre-generated datasets.

# Read in trains
trains_pass <- readRDS("../data/processed/trains_pass.rds")

# Read in UZA
UZA10_NE <- readRDS("../data/processed/UZA10_NE.rds")
UZA20_NE <- readRDS("../data/processed/UZA20_NE.rds")

# Create labels for helpful hovering.
labs10 <- UZA10_NE$NAME10
labs20 <- UZA20_NE$NAME20

We perform an intersection over each of the datasets. We map the datasets. Visual inspection appears to show some extra sidings in RI that may be adding extra mileage. More work would need to be done to explore what combination of fields need to be changed to isolate the lines that are used for service.

trains_pass_UZA10 <- st_intersection(trains_pass, UZA10_NE)
trains_pass_UZA20 <- st_intersection(trains_pass, UZA20_NE)

UZA20_MA <- UZA20_NE |> 
  filter(str_detect(NAME20, "MA")) |> 
  select(GEOID20, NAME20)
  
# Display the results for 2020 UZAs.
mapview(trains_pass |> summarize(name = "All Tracks"), 
    layer.name = "All Tracks",
    color = "black",
    lwd = 2) +  
mapview(UZA20_MA |> filter(NAME20 %in% trains_pass_UZA20$NAME20), 
    zcol = "NAME20", 
    col.regions = viridis::turbo, 
    alpha.regions = 0.30,
    layer.name = "UZA 2020: UZA",
    legend = FALSE) + 
mapview(UZA20_MA |> filter(!NAME20 %in% trains_pass_UZA20$NAME20), 
    zcol = "NAME20", 
    col.regions = "grey90", 
    alpha.regions = 0.30,
    layer.name = "UZA 2020: Non UZA",
    legend = FALSE) +
mapview(trains_pass_UZA20 |> 
          group_by(NAME20) |> 
          summarize(), 
    zcol = "NAME20", 
    layer.name = "Tracks by UZA",
    lwd = 3, 
    color = viridis::turbo)

Aggregate by UZA

We want to aggregate the data by UZA. We calculate the length of the track in each UZA, then sum. We subtract from the total to find the Non UZA area.

# Find the total mileage of the CR system. (In meters!)
trains_pass$len <- st_length(trains_pass) |> 
  units::drop_units()

# Aggregate for the total.
trains_pass_len_tot <- sum(trains_pass$len)

# Calculate the length then drop the units for 2010 and 2020.  
trains_pass_UZA10$len <- st_length(trains_pass_UZA10) |> 
  units::drop_units()

trains_pass_UZA20$len <- st_length(trains_pass_UZA20) |> 
  units::drop_units()

#  Summarize 2010 by UZA
trains_pass_UZA10_summ <- trains_pass_UZA10 |> 
  group_by(NAME10) |> 
  summarize(len_tot = sum(len)) |> 
  st_drop_geometry()

# Find length in UZAs
UZA10_len <- sum(trains_pass_UZA10_summ$len_tot)

# Subtract and attach the non-UZA length. Make a fresh row to append.
trains_pass_UZA10_summ <- bind_rows(
  trains_pass_UZA10_summ,
  tibble(NAME10 = "NonUZA", 
         len_tot = trains_pass_len_tot - UZA10_len))

# Calculate the percentage by UZA.
trains_pass_UZA10_summ <- trains_pass_UZA10_summ |> 
  mutate(pct = len_tot/sum(len_tot))

# Check Delta
sum(trains_pass_UZA10_summ$len_tot) - trains_pass_len_tot
[1] 2.328306e-10
# Summarize 2020 by Geometry ------------
trains_pass_UZA20_summ <- trains_pass_UZA20 |> 
  group_by(NAME20) |> 
  summarize(len_tot = sum(len)) |> 
  st_drop_geometry()

# Find length in UZAs
UZA20_len <- sum(trains_pass_UZA20_summ$len_tot)

# Subtract and attach the non-UZA length. Make a fresh row to append.
trains_pass_UZA20_summ <- bind_rows(
  trains_pass_UZA20_summ,
  tibble(NAME20 = "NonUZA", 
         len_tot = trains_pass_len_tot - UZA20_len))

trains_pass_UZA20_summ <- trains_pass_UZA20_summ |> 
  mutate(pct = len_tot/sum(len_tot))

# Check Delta
sum(trains_pass_UZA20_summ$len_tot) - trains_pass_len_tot
[1] 0

Label and convert the units then export the data to CSVs.

# Label Units.
trains_pass_UZA10_summ <- trains_pass_UZA10_summ |> 
  rename(len_tot_m = len_tot) |> 
  mutate(len_tot_mi = measurements::conv_unit(len_tot_m, "m", "mi")) |> 
  select(NAME10, len_tot_m, len_tot_mi, pct)

trains_pass_UZA20_summ <- trains_pass_UZA20_summ |> 
  rename(len_tot_m = len_tot) |> 
  mutate(len_tot_mi = measurements::conv_unit(len_tot_m, "m", "mi")) |> 
  select(NAME20, len_tot_m, len_tot_mi, pct)

write_csv(trains_pass_UZA10_summ, "../output/trains_pass_UZA10_summ.csv")

write_csv(trains_pass_UZA20_summ, "../output/trains_pass_UZA20_summ.csv")
NAME20 len_tot_m len_tot_mi pct
Boston, MA--NH 796,328.62 494.82 67.48%
Providence, RI--MA 160,242.31 99.57 13.58%
NonUZA 92,491.84 57.47 7.84%
Barnstable Town, MA 47,243.40 29.36 4.00%
Worcester, MA--CT 43,023.25 26.73 3.65%
Leominster--Fitchburg, MA 34,971.80 21.73 2.96%
Ipswich, MA 5,819.66 3.62 0.49%
Total 1,180,120.87 733.29 100.00%
NAME10 len_tot_m len_tot_mi pct
Boston, MA--NH--RI 816,759.96 507.51 69.21%
Providence, RI--MA 145,983.02 90.71 12.37%
NonUZA 89,295.72 55.49 7.57%
Barnstable Town, MA 48,828.00 30.34 4.14%
Worcester, MA--CT 40,533.80 25.19 3.43%
Leominster--Fitchburg, MA 38,720.37 24.06 3.28%
Total 1,180,120.87 733.29 100.00%